/*
 * TOP SECRET Copyright 2006-2015 Transsion.com All right reserved. This software is the confidential and proprietary
 * information of Transsion.com ("Confidential Information"). You shall not disclose such Confidential Information and
 * shall use it only in accordance with the terms of the license agreement you entered into with Transsion.com.
 */
package com.yunji.framework_template.biz.crawler.in;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import javax.annotation.Resource;

import org.springframework.stereotype.Service;

import com.yunji.framework_template.biz.crawler.ContentImageHandler;
import com.yunji.framework_template.biz.crawler.NewsCrawler;
import com.yunji.framework_template.biz.crawler.SourceType;
import com.yunji.framework_template.common.enumeration.CountryCode;
import com.yunji.framework_template.common.enumeration.NewsType;

/**
 * ClassName:NationNewsCrawler <br/>
 * Date: 2018年12月18日 下午3:31:24 <br/>
 * 
 * @author fenglibin1982@163.com
 * @Blog http://blog.csdn.net/fenglibing
 * @version
 * @see
 */
@Service
public class IndiatodayNewsCrawler extends NewsCrawler {

    @Resource
    private IndiatodayNewsImageHandler indiatodayNewsImageHandler;

    @Override
    public boolean isOkUrl(String url) {
        // 通过判断url的path中是否包括年月来判断其是否为新闻详情页
        URI uri;
        try {
            if (url == null || url.trim().length() == 0) {
                return false;
            }
            if (url.startsWith("mailto") || url.toLowerCase().startsWith("javascript")) {
                return false;
            }
            uri = new URI(url);
            String host = uri.getHost();
            if(host==null) {
                return false;
            }
            if (host.indexOf("indiatoday.in") < 0) {
                return false;
            }
            String path = uri.getPath();
            String[] pathArr = path.split("/");
            if (pathArr.length >= 4 && pathArr[pathArr.length - 1].split("-").length >= 3) {
                return true;
            } else if (host.equals("aajtak.intoday.in") && pathArr.length >= 3 && url.indexOf(".html") > 0) {
                return true;
            }
        } catch (URISyntaxException e) {
        }
        return false;
    }

    @Override
    public ContentImageHandler getContentImageHandler() {
        return indiatodayNewsImageHandler;
    }

    @Override
    public List<SourceType> getSourceTypeList() {
        List<SourceType> sourceTypeList = new ArrayList<SourceType>();
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/videos").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/elections").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/india").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/movies").newsType(NewsType.FUN).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/technology").newsType(NewsType.HOUSE).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/sports").newsType(NewsType.ASKANDANSWER).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/fact-check").newsType(NewsType.HEALTH).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/trending-news").newsType(NewsType.MILITARY).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/world").newsType(NewsType.TECH).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/fyi").newsType(NewsType.FINANCE).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/education-today").newsType(NewsType.NEWS).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/lifestyle").newsType(NewsType.BUSINESS).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/television").newsType(NewsType.ENTERTAINMENT).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/auto").newsType(NewsType.BLOG).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/photo").newsType(NewsType.MAN).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/mail-today").newsType(NewsType.WOMAN).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/business").newsType(NewsType.KIDS).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/programmes").newsType(NewsType.POLITICS).build());

        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/sports/cricket").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/sports/football").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/sports/tennis").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/full-fixtures").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/sports/formula-one").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/sports/wwe").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/sports/badminton").newsType(NewsType.FUN).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/sports/athletics").newsType(NewsType.HOUSE).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/sports/hockey").newsType(NewsType.ASKANDANSWER).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/sports/golf").newsType(NewsType.HEALTH).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/sports/other-sports").newsType(NewsType.MILITARY).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/sports/hockey-world-cup-2018").newsType(NewsType.CAR).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/ipl-2018").newsType(NewsType.TECH).build());

        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/technology/news").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/technology/reviews").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/technology/features").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/technology/talking-points").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/technology/video").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/technology/buying-guide").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://www.indiatoday.in/technology/tech-tips").newsType(NewsType.FUN).build());

        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/news.html").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/national.html").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/world-news.html").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/sports").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/news-on-films.html").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/business.html").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/states-news.html").newsType(NewsType.FUN).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/mobile.html").newsType(NewsType.HOUSE).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/videos.html").newsType(NewsType.ASKANDANSWER).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/videos/national.html").newsType(NewsType.HEALTH).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/videos/world.html").newsType(NewsType.MILITARY).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/videos/sports.html").newsType(NewsType.CAR).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/videos/entertainment.html").newsType(NewsType.TECH).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/videos/tech.html").newsType(NewsType.FINANCE).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/videos/exclusive.html").newsType(NewsType.NEWS).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/videos/non-stop-100-news-videos.html").newsType(NewsType.BUSINESS).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/photos.html").newsType(NewsType.ENTERTAINMENT).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/photos/national.html").newsType(NewsType.BLOG).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/photos/world.html").newsType(NewsType.MAN).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/photos/sports.html").newsType(NewsType.WOMAN).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/photos/entertainment.html").newsType(NewsType.KIDS).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/photos/lifestyle.html").newsType(NewsType.FOOD).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/photos/relationship.html").newsType(NewsType.POLITICS).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/photos/dharma.html").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/photos/tourism.html").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/photos/business.html").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/tech/").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/moviemasala/").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/crime/").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/sports/").newsType(NewsType.FUN).build());
        sourceTypeList.add(SourceType.builder().url("https://aajtak.intoday.in/agendaaajtak/2018/").newsType(NewsType.HOUSE).build());

        return sourceTypeList;
    }

    @Override
    public Set<String> getCountryCodeSet() {
        Set<String> countryCodeSet = new HashSet<String>();
        countryCodeSet.add(CountryCode.IN.name());
        return countryCodeSet;
    }
}
